package example;

import com.alisonyu.spider.Proccessor.Context;
import com.alisonyu.spider.Router.Router;
import com.alisonyu.spider.Spider;
import com.alisonyu.spider.SpiderConfig;
import io.vertx.core.json.JsonObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.List;

/**
 * 爬取一下4月份煎蛋网的所有文章
 */
public class MemorySpider
{


    public void start(){
        SpiderConfig config = new SpiderConfig();
        config.setInitUrls(new String[]{"http://jandan.net"});
        Router router = new Router();
        router.route().filter(url->url.equals("http://jandan.net")).handler(this::handleMainPage);
        router.route().filter(url->url.contains("jandan.net/2018/04/")).handler(this::handleArticlePage);
        config.setRouter(router);
        config.setInterval(2);
        Spider spider = new Spider(config);
        spider.start();
    }

    public void handleMainPage(Context context){
        String html = context.getResponse().getContent();
        Document doc = Jsoup.parse(html);
        //抓取indexs类的h2中的a
        List<String> urls = doc.select(".indexs h2 a").eachAttr("href");
        //将文章的链接加入待处理队列
        for (String url:urls){
            System.out.println(url);
            context.addUrl(url);
        }
    }

    public void handleArticlePage(Context context){
        String html = context.getResponse().getContent();
        Document doc = Jsoup.parse(html);
        Elements body = doc.select(".post");
        if (body!=null){
            String title = body.select("h1 a").text();
            StringBuilder sb = new StringBuilder();
            body.select("p").eachText().forEach(sb::append);
            String content = sb.toString();
            System.out.println("title:"+title);
            System.out.println("content:"+content);
            //自己做一些持久化的操作
        }
    }


    public static void main(String[] args){
        MemorySpider spider = new MemorySpider();
        spider.start();
    }

}
